In [13]:
import requests
from bs4 import BeautifulSoup 
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import geopandas as gpd
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
In [27]:
#ACS data for population 
#2022 
url = 'https://api.census.gov/data/2022/acs/acs1?'
params = {
    'get': 'NAME,B01001_001E',
    'for': 'state:*'
}
r = requests.get(url, params=params).json()
df = pd.DataFrame(r).drop(0).fillna(0)
df = df.rename(columns={0: 'State', 1: "Population"})
pop_2022=df

#2021
url = 'https://api.census.gov/data/2021/acs/acs1?'
params = {
    'get': 'NAME,B01001_001E',
    'for': 'state:*'
}
r = requests.get(url, params=params).json()
df = pd.DataFrame(r).drop(0).fillna(0)
df = df.rename(columns={0: 'State', 1: "Population"})
pop_2021=df

#2020
df = pd.read_excel("C:/Users/thoma/Downloads/XK200101.xlsx")
state_names = df.iloc[5].tolist()[1::2]
populations = df.iloc[7].tolist()[1::2]
df_new = pd.DataFrame({'State': state_names, 'Population': populations})
pop_2020=df_new

#2019
url = 'https://api.census.gov/data/2019/acs/acs1?'
params = {
    'get': 'NAME,B01001_001E',
    'for': 'state:*'
}
r = requests.get(url, params=params).json()
df = pd.DataFrame(r).drop(0).fillna(0)
df = df.rename(columns={0: 'State', 1: "Population"})
pop_2019=df

# merged_pop
merged_pop = pd.merge(pop_2022, pop_2021, on='State', suffixes=('_2022', '_2021'))
merged_pop = pd.merge(merged_pop, pop_2020, on='State')
merged_pop = pd.merge(merged_pop, pop_2019, on='State', suffixes=('_2020', '_2019'))
In [23]:
#2019 , from bls for employment 
url = "https://data.bls.gov/cew/apps/table_maker/v4/table_maker.htm#type=0&year=2019&qtr=A&own=0&ind=10&supp=0"
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
driver.get(url)
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.verdana.md tbody tr')))
html_content = driver.page_source
driver.quit()
soup = BeautifulSoup(html_content, "html.parser")
rows = soup.select('.verdana.md tbody tr')
data = []
for row in rows:
    cells = row.find_all(['th', 'td'])
    if len(cells) >= 3:
        state = cells[0].get_text(strip=True)
        annual_avg_employment = cells[2].get_text(strip=True)
        data.append({'State': state, '2019 Employment': annual_avg_employment})
df_2019 = pd.DataFrame(data)

#2020
url="https://data.bls.gov/cew/apps/table_maker/v4/table_maker.htm#type=0&year=2020&qtr=A&own=0&ind=10&supp=0"
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
driver.get(url)
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.verdana.md tbody tr')))
html_content = driver.page_source
driver.quit()
soup = BeautifulSoup(html_content, "html.parser")
rows = soup.select('.verdana.md tbody tr')
data = []
for row in rows:
    cells = row.find_all(['th', 'td'])
    if len(cells) >= 3:
        state = cells[0].get_text(strip=True)
        annual_avg_employment = cells[2].get_text(strip=True)
        data.append({'State': state, '2020 Employment': annual_avg_employment})
df_2020 = pd.DataFrame(data)

#2021
url="https://data.bls.gov/cew/apps/table_maker/v4/table_maker.htm#type=0&year=2021&qtr=A&own=0&ind=10&supp=0"
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
driver.get(url)
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.verdana.md tbody tr')))
html_content = driver.page_source
driver.quit()
soup = BeautifulSoup(html_content, "html.parser")
rows = soup.select('.verdana.md tbody tr')
data = []
for row in rows:
    cells = row.find_all(['th', 'td'])
    if len(cells) >= 3:
        state = cells[0].get_text(strip=True)
        annual_avg_employment = cells[2].get_text(strip=True)
        data.append({'State': state, '2021 Employment': annual_avg_employment})
df_2021 = pd.DataFrame(data)

#2022
url="https://data.bls.gov/cew/apps/table_maker/v4/table_maker.htm#type=0&year=2022&qtr=A&own=0&ind=10&supp=0"
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
driver.get(url)
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.verdana.md tbody tr')))
html_content = driver.page_source
driver.quit()
soup = BeautifulSoup(html_content, "html.parser")
rows = soup.select('.verdana.md tbody tr')
data = []
for row in rows:
    cells = row.find_all(['th', 'td'])
    if len(cells) >= 3:
        state = cells[0].get_text(strip=True)
        annual_avg_employment = cells[2].get_text(strip=True)
        data.append({'State': state, '2022 Employment': annual_avg_employment})
df_2022 = pd.DataFrame(data)

#merged of bls 
merged_df = pd.merge(df_2019, df_2020, on='State', how='inner', suffixes=('_2019', '_2020'))
merged_df = pd.merge(merged_df, df_2021, on='State', how='inner', suffixes=('_2020', '_2021'))
merged_df = pd.merge(merged_df, df_2022, on='State', how='inner', suffixes=('_2021', '_2022'))
merged_df.columns = ["State", "2019", "2020", "2021", "2022"]
merged_df = merged_df.head(52)
In [17]:
#for bls,unemployment rate=poulation-employment/population
merged_df = merged_df.drop(0)
m_df = pd.merge(merged_df, merged_pop, on='State', how='left')
m_df['Population_2022'] = m_df['Population_2022'].str.replace(',', '').astype(int)
m_df['2022'] = m_df['2022'].str.replace(',', '').astype(int)
m_df['Population_2021'] = m_df['Population_2021'].str.replace(',', '').astype(int)
m_df['2021'] = m_df['2021'].str.replace(',', '').astype(int)
m_df['Population_2020'] = m_df['Population_2020'].str.replace(',', '').astype(int)
m_df['2020'] = m_df['2020'].str.replace(',', '').astype(int)
m_df['Population_2019'] = m_df['Population_2019'].str.replace(',', '').astype(int)
m_df['2019'] = m_df['2019'].str.replace(',', '').astype(int)
m_df['Unemployment Rate 2022'] = ((m_df['Population_2022'] - m_df['2022']) / m_df['Population_2022']) * 100
m_df['Unemployment Rate 2021'] = ((m_df['Population_2021'] - m_df['2021']) / m_df['Population_2021']) * 100
m_df['Unemployment Rate 2020'] = ((m_df['Population_2020'] - m_df['2020']) / m_df['Population_2020']) * 100
m_df['Unemployment Rate 2019'] = ((m_df['Population_2019'] - m_df['2019']) / m_df['Population_2019']) * 100

# interactive map for unemployment 
us_states_geojson_path = 'C:/Users/thoma/Downloads/gz_2010_us_040_00_500k.json'
gdf_states = gpd.read_file(us_states_geojson_path)
gdf_states.rename(columns={'NAME': 'State'}, inplace=True)
merged_df = pd.merge(gdf_states, m_df, on='State', how='left')
fig = px.choropleth(
    merged_df,
    geojson=gdf_states.geometry,
    locations=gdf_states.index,
    color=(merged_df['2022'] - merged_df['2019']) / merged_df['2019'] * 100,  # Calculate unemployment rate change
    hover_name='State',
    hover_data=['2019', '2020', '2021', '2022'],
    title='Unemployment Rate Change (2019-2022) by State',
    color_continuous_scale="Viridis",
    scope="usa"
)
fig.show()

#top five and worst five each year 
top_five_2019 = merged_df.nlargest(5, 'Unemployment Rate 2019')[['State', 'Unemployment Rate 2019']]
worst_five_2019 = merged_df.nsmallest(5, 'Unemployment Rate 2019')[['State', 'Unemployment Rate 2019']]
top_five_2020 = merged_df.nlargest(5, 'Unemployment Rate 2020')[['State', 'Unemployment Rate 2020']]
worst_five_2020 = merged_df.nsmallest(5, 'Unemployment Rate 2020')[['State', 'Unemployment Rate 2020']]
top_five_2021 = merged_df.nlargest(5, 'Unemployment Rate 2021')[['State', 'Unemployment Rate 2021']]
worst_five_2021 = merged_df.nsmallest(5, 'Unemployment Rate 2021')[['State', 'Unemployment Rate 2021']]
top_five_2022 = merged_df.nlargest(5, 'Unemployment Rate 2022')[['State', 'Unemployment Rate 2022']]
worst_five_2022 = merged_df.nsmallest(5, 'Unemployment Rate 2022')[['State', 'Unemployment Rate 2022']]

print("Top Five States with Highest Unemployment Rate (2019):")
print(top_five_2019)
print("\nWorst Five States with Lowest Unemployment Rate (2019):")
print(worst_five_2019)
print("\nTop Five States with Highest Unemployment Rate (2020):")
print(top_five_2020)
print("\nWorst Five States with Lowest Unemployment Rate (2020):")
print(worst_five_2020)
print("\nTop Five States with Highest Unemployment Rate (2021):")
print(top_five_2021)
print("\nWorst Five States with Lowest Unemployment Rate (2021):")
print(worst_five_2021)
print("\nTop Five States with Highest Unemployment Rate (2022):")
print(top_five_2022)
print("\nWorst Five States with Lowest Unemployment Rate (2022):")
print(worst_five_2022)
Top Five States with Highest Unemployment Rate (2019):
            State  Unemployment Rate 2019
38    Mississippi               61.843375
50  West Virginia               61.567829
42     New Mexico               60.098129
20        Arizona               60.036556
21       Arkansas               59.636013

Worst Five States with Lowest Unemployment Rate (2019):
                   State  Unemployment Rate 2019
26  District of Columbia               -9.959915
43          North Dakota               44.514095
1          Massachusetts               47.238079
37             Minnesota               48.538752
40              Nebraska               49.209060

Top Five States with Highest Unemployment Rate (2020):
            State  Unemployment Rate 2020
50  West Virginia               64.028292
38    Mississippi               63.265926
42     New Mexico               62.884474
20        Arizona               61.951119
36      Louisiana               61.669500

Worst Five States with Lowest Unemployment Rate (2020):
                   State  Unemployment Rate 2020
26  District of Columbia               -1.861210
43          North Dakota               48.261944
40              Nebraska               51.065185
1          Massachusetts               51.778395
37             Minnesota               52.174519

Top Five States with Highest Unemployment Rate (2021):
            State  Unemployment Rate 2021
50  West Virginia               63.105938
42     New Mexico               62.347008
38    Mississippi               62.120805
18        Alabama               61.007203
36      Louisiana               60.720641

Worst Five States with Lowest Unemployment Rate (2021):
                   State  Unemployment Rate 2021
26  District of Columbia               -8.324155
43          North Dakota               48.322726
1          Massachusetts               50.520515
40              Nebraska               50.819782
37             Minnesota               51.381262

Top Five States with Highest Unemployment Rate (2022):
            State  Unemployment Rate 2022
50  West Virginia               62.043787
38    Mississippi               61.039769
42     New Mexico               60.590278
18        Alabama               60.071269
44       Oklahoma               59.577467

Worst Five States with Lowest Unemployment Rate (2022):
                   State  Unemployment Rate 2022
26  District of Columbia              -11.451869
43          North Dakota               47.218326
1          Massachusetts               48.501212
40              Nebraska               49.999670
37             Minnesota               50.080914
In [24]:
#bls employment rate 
merged_df.columns = ["State", "STATE 2019", "STATE 2020", "STATE 2021", "STATE 2022"]
merged_df["STATE 2019"] = pd.to_numeric(merged_df["STATE 2019"].replace({",": ""}, regex=True), errors="coerce")
merged_df["STATE 2020"] = pd.to_numeric(merged_df["STATE 2020"].replace({",": ""}, regex=True), errors="coerce")
merged_df["STATE 2021"] = pd.to_numeric(merged_df["STATE 2021"].replace({",": ""}, regex=True), errors="coerce")
merged_df["STATE 2022"] = pd.to_numeric(merged_df["STATE 2022"].replace({",": ""}, regex=True), errors="coerce")
df_rate_changes_merged = pd.DataFrame()
df_rate_changes_merged["2020-2019"] = ((merged_df["STATE 2020"] - merged_df["STATE 2019"]) / merged_df["STATE 2019"]) * 100
df_rate_changes_merged["2021-2020"] = ((merged_df["STATE 2021"] - merged_df["STATE 2020"]) / merged_df["STATE 2020"]) * 100
df_rate_changes_merged["2022-2021"] = ((merged_df["STATE 2022"] - merged_df["STATE 2021"]) / merged_df["STATE 2021"]) * 100
df_rate_changes_merged["State"] = merged_df["State"]

#interactive plot for bls for unemployment rate change
us_states_geojson_path = 'C:/Users/thoma/Downloads/gz_2010_us_040_00_500k.json'
gdf_states = gpd.read_file(us_states_geojson_path)
gdf_states.rename(columns={'NAME': 'State'}, inplace=True)
merged_df = pd.merge(gdf_states, df_rate_changes_merged, on='State', how='left')

#2020-20
fig = px.choropleth(
    merged_df,
    geojson=gdf_states.geometry,
    locations=gdf_states.index,
    color='2020-2019',
    hover_name='State',
    hover_data=['2020-2019'],
    title='Employment Rate Change (2020-2019) by State',
    color_continuous_scale="Viridis",
    scope="usa"
)
fig.show()

#2021-2020
fig = px.choropleth(
    merged_df,
    geojson=gdf_states.geometry,
    locations=gdf_states.index,
    color='2021-2020',
    hover_name='State',
    hover_data=['2021-2020'],
    title='Employment Rate Change (2021-2020) by State',
    color_continuous_scale="Viridis",
    scope="usa"
)
fig.show()

#2022-2021
fig = px.choropleth(
    merged_df,
    geojson=gdf_states.geometry,
    locations=gdf_states.index,
    color='2022-2021',
    hover_name='State',
    hover_data=['2022-2021'],
    title='Employment Rate Change (2022-2021) by State',
    color_continuous_scale="Viridis",
    scope="usa"
)
fig.show()
In [ ]:
# Find top and worst five states for each year
top_five_2019 = df_rate_changes_merged.nlargest(5, "2020-2019")
worst_five_2019 = df_rate_changes_merged.nsmallest(5, "2020-2019")
top_five_2020 = df_rate_changes_merged.nlargest(5, "2021-2020")
worst_five_2020 = df_rate_changes_merged.nsmallest(5, "2021-2020")
top_five_2021 = df_rate_changes_merged.nlargest(5, "2022-2021")
worst_five_2021 = df_rate_changes_merged.nsmallest(5, "2022-2021")

# Plot
for i, (top, worst, year) in enumerate(zip([top_five_2019, top_five_2020, top_five_2021],
                                           [worst_five_2019, worst_five_2020, worst_five_2021],
                                           ["2020-2019", "2021-2020", "2022-2021"])):
    plt.figure(figsize=(15, 6))
    plt.bar(top["State"], top[year], color="green", label="Top 5")
    plt.bar(worst["State"], worst[year], color="red", label="Worst 5")
    plt.title(f"Top and Worst Five States for Employment Rate Change {year} (in %)")
    plt.xlabel("State")
    plt.ylabel("Rate Change (%)")
    plt.legend()
    plt.show()
    print(f"Top Five States for Employment Rate Change {year}:")
    print(top)
    print("\nWorst Five States for Employment Rate Change {year}:")
    print(worst)
    print("\n" + "-" * 50)
In [ ]:
#seperated by region
state_to_region = {
    'Alabama': 'South', 'Alaska': 'West', 'Arizona': 'West', 'Arkansas': 'South',
    'California': 'West', 'Colorado': 'West', 'Connecticut': 'Northeast', 'Delaware': 'South',
    'District of Columbia': 'Northeast', 'Florida': 'South', 'Georgia': 'South', 'Hawaii': 'West',
    'Idaho': 'West', 'Illinois': 'Midwest', 'Indiana': 'Midwest', 'Iowa': 'Midwest', 'Kansas': 'Midwest',
    'Kentucky': 'South', 'Louisiana': 'South', 'Maine': 'Northeast', 'Maryland': 'South',
    'Massachusetts': 'Northeast', 'Michigan': 'Midwest', 'Minnesota': 'Midwest', 'Mississippi': 'South',
    'Missouri': 'Midwest', 'Montana': 'West', 'Nebraska': 'Midwest', 'Nevada': 'West',
    'New Hampshire': 'Northeast', 'New Jersey': 'Northeast', 'New Mexico': 'West', 'New York': 'Northeast',
    'North Carolina': 'South', 'North Dakota': 'Midwest', 'Ohio': 'Midwest', 'Oklahoma': 'South',
    'Oregon': 'West', 'Pennsylvania': 'Northeast', 'Rhode Island': 'Northeast', 'South Carolina': 'South',
    'South Dakota': 'Midwest', 'Tennessee': 'South', 'Texas': 'South', 'Utah': 'West', 'Vermont': 'Northeast',
    'Virginia': 'South', 'Washington': 'West', 'West Virginia': 'South', 'Wisconsin': 'Midwest', 'Wyoming': 'West'
}

df_rate_changes_merged['Region'] = df_rate_changes_merged['State'].map(state_to_region)
df_rate_changes_merged['Region'] = df_rate_changes_merged['State'].map(state_to_region)
df_rate_changes_merged = df_rate_changes_merged.sort_values('Region')
df_rate_changes_merged = df_rate_changes_merged.iloc[:-1]
region_means = df_rate_changes_merged.groupby('Region')[["2020-2019", "2021-2020", "2022-2021"]].mean()

# Overall trend for 2020-2019
plt.figure(figsize=(12, 6))
sns.lineplot(data=df_rate_changes_merged, x='Region', y='2020-2019')
plt.title('Overall Trend for 2020-2019')
plt.ylabel('Employment Rate Change')
plt.xlabel('Region')
plt.xticks(rotation=45)
plt.show()

# Overall trend for 2021-2020
plt.figure(figsize=(12, 6))
sns.lineplot(data=df_rate_changes_merged, x='Region', y='2021-2020')
plt.title('Overall Trend for 2021-2020')
plt.ylabel('Employment Rate Change')
plt.xlabel('Region')
plt.xticks(rotation=45)
plt.show()

# Overall trend for 2022-2021
plt.figure(figsize=(12, 6))
sns.lineplot(data=df_rate_changes_merged, x='Region', y='2022-2021')
plt.title('Overall Trend for 2022-2021')
plt.ylabel('Employment Rate Change')
plt.xlabel('Region')
plt.xticks(rotation=45)
plt.show()
In [25]:
#bea for employment 
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
url="https://apps.bea.gov/itable/?ReqID=70&step=1&_gl=1*1ntcawo*_ga*NzUyMDc4MzIzLjE3MDk0MzIyNTM.*_ga_J4698JNNFT*MTcwOTQzMjI1My4xLjEuMTcwOTQzMzExMS42MC4wLjA.#eyJhcHBpZCI6NzAsInN0ZXBzIjpbMSwyOSwyNSwzMSwyNiwyNywzMF0sImRhdGEiOltbIlRhYmxlSWQiLCI2MDAiXSxbIk1ham9yX0FyZWEiLCIwIl0sWyJTdGF0ZSIsWyIwIl1dLFsiQXJlYSIsWyJYWCJdXSxbIlN0YXRpc3RpYyIsWyIxNSJdXSxbIlVuaXRfb2ZfbWVhc3VyZSIsIkxldmVscyJdLFsiWWVhciIsWyIyMDIyIiwiMjAyMSIsIjIwMjAiLCIyMDE5Il1dLFsiWWVhckJlZ2luIiwiLTEiXSxbIlllYXJfRW5kIiwiLTEiXV19"
driver.get(url)
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.XPATH, "//table")))
html_content = driver.page_source
soup = BeautifulSoup(html_content, "html.parser")
tables = soup.find_all("table")
data = []
for table in tables:
    rows = table.find_all("tr")
    for row in rows:
        cells = row.find_all(["th", "td"])
        row_data = [cell.get_text(strip=True) for cell in cells]
        data.append(row_data)
driver.quit()
df2 = pd.DataFrame(data)
df2 = df2.iloc[1:]
df2 = df2.drop(columns=[0])  
In [35]:
#for bea,unemployment rate=poulation-employment/population
df2.columns = ["State", "2019", "2020", "2021", "2022"]
m_df = pd.merge(df2, merged_pop, on='State', how='left')
m_df = m_df.drop(0)
m_df['Population_2022'] = m_df['Population_2022'].str.replace(',', '').astype(int)
m_df['2022'] = m_df['2022'].str.replace(',', '').astype(int)

m_df['Population_2021'] = m_df['Population_2021'].str.replace(',', '').astype(int)
m_df['2021'] = m_df['2021'].str.replace(',', '').astype(int)

m_df['Population_2020'] = m_df['Population_2020'].str.replace(',', '').astype(int)
m_df['2020'] = m_df['2020'].str.replace(',', '').astype(int)

m_df['Population_2019'] = m_df['Population_2019'].str.replace(',', '').astype(int)
m_df['2019'] = m_df['2019'].str.replace(',', '').astype(int)

m_df['Unemployment Rate 2022'] = ((m_df['Population_2022'] - m_df['2022']) / m_df['Population_2022']) * 100
m_df['Unemployment Rate 2021'] = ((m_df['Population_2021'] - m_df['2021']) / m_df['Population_2021']) * 100
m_df['Unemployment Rate 2020'] = ((m_df['Population_2020'] - m_df['2020']) / m_df['Population_2020']) * 100
m_df['Unemployment Rate 2019'] = ((m_df['Population_2019'] - m_df['2019']) / m_df['Population_2019']) * 100

# interactive map for unemployment 
us_states_geojson_path = 'C:/Users/thoma/Downloads/gz_2010_us_040_00_500k.json'
gdf_states = gpd.read_file(us_states_geojson_path)
gdf_states.rename(columns={'NAME': 'State'}, inplace=True)
merged_df = pd.merge(gdf_states, m_df, on='State', how='left')
fig = px.choropleth(
    merged_df,
    geojson=gdf_states.geometry,
    locations=gdf_states.index,
    color=(merged_df['2022'] - merged_df['2019']) / merged_df['2019'] * 100,  # Calculate unemployment rate change
    hover_name='State',
    hover_data=['2019', '2020', '2021', '2022'],
    title='Unemployment Rate Change (2019-2022) by State',
    color_continuous_scale="Viridis",
    scope="usa"
)
fig.show()

#top five and worst five each year 
top_five_2019 = merged_df.nlargest(5, 'Unemployment Rate 2019')[['State', 'Unemployment Rate 2019']]
worst_five_2019 = merged_df.nsmallest(5, 'Unemployment Rate 2019')[['State', 'Unemployment Rate 2019']]
top_five_2020 = merged_df.nlargest(5, 'Unemployment Rate 2020')[['State', 'Unemployment Rate 2020']]
worst_five_2020 = merged_df.nsmallest(5, 'Unemployment Rate 2020')[['State', 'Unemployment Rate 2020']]
top_five_2021 = merged_df.nlargest(5, 'Unemployment Rate 2021')[['State', 'Unemployment Rate 2021']]
worst_five_2021 = merged_df.nsmallest(5, 'Unemployment Rate 2021')[['State', 'Unemployment Rate 2021']]
top_five_2022 = merged_df.nlargest(5, 'Unemployment Rate 2022')[['State', 'Unemployment Rate 2022']]
worst_five_2022 = merged_df.nsmallest(5, 'Unemployment Rate 2022')[['State', 'Unemployment Rate 2022']]
print("Top Five States with Highest Unemployment Rate (2019):")
print(top_five_2019)
print("\nWorst Five States with Lowest Unemployment Rate (2019):")
print(worst_five_2019)
print("\nTop Five States with Highest Unemployment Rate (2020):")
print(top_five_2020)
print("\nWorst Five States with Lowest Unemployment Rate (2020):")
print(worst_five_2020)
print("\nTop Five States with Highest Unemployment Rate (2021):")
print(top_five_2021)
print("\nWorst Five States with Lowest Unemployment Rate (2021):")
print(worst_five_2021)
print("\nTop Five States with Highest Unemployment Rate (2022):")
print(top_five_2022)
print("\nWorst Five States with Lowest Unemployment Rate (2022):")
print(worst_five_2022)
Top Five States with Highest Unemployment Rate (2019):
            State  Unemployment Rate 2019
50  West Virginia               50.805710
42     New Mexico               46.736525
20        Arizona               46.007724
38    Mississippi               45.993699
21       Arkansas               44.863218

Worst Five States with Lowest Unemployment Rate (2019):
                   State  Unemployment Rate 2019
26  District of Columbia              -29.679532
43          North Dakota               23.706208
1          Massachusetts               29.301344
51               Wyoming               29.496906
48               Vermont               30.536917

Top Five States with Highest Unemployment Rate (2020):
            State  Unemployment Rate 2020
50  West Virginia               52.887125
42     New Mexico               49.222719
20        Arizona               47.179340
38    Mississippi               46.969868
2        Michigan               45.941814

Worst Five States with Lowest Unemployment Rate (2020):
                   State  Unemployment Rate 2020
26  District of Columbia              -21.501902
43          North Dakota               26.768403
51               Wyoming               31.017968
47          South Dakota               32.089713
40              Nebraska               32.613335

Top Five States with Highest Unemployment Rate (2021):
            State  Unemployment Rate 2021
50  West Virginia               51.512682
42     New Mexico               48.359994
18        Alabama               45.048976
38    Mississippi               44.891041
2        Michigan               44.266816

Worst Five States with Lowest Unemployment Rate (2021):
                   State  Unemployment Rate 2021
26  District of Columbia              -29.801955
43          North Dakota               26.726180
51               Wyoming               27.362332
47          South Dakota               30.139964
23              Colorado               31.368554

Top Five States with Highest Unemployment Rate (2022):
            State  Unemployment Rate 2022
50  West Virginia               49.829198
42     New Mexico               46.081282
18        Alabama               43.441790
38    Mississippi               43.057499
21       Arkansas               42.358988

Worst Five States with Lowest Unemployment Rate (2022):
                   State  Unemployment Rate 2022
26  District of Columbia              -34.676237
43          North Dakota               24.567122
51               Wyoming               24.909139
23              Colorado               28.287721
1          Massachusetts               28.354875
In [36]:
# for bea, unemployment change rate 
df2.columns = ["state", "2019", "2020", "2021", "2022"]
df2 = df2[(df2["state"] != "GeoName") & (df2["state"] != "state")]
for year in ["2019", "2020", "2021", "2022"]:
    df2[year] = df2[year].astype(str).str.replace(",", "")
df2["2019"] = pd.to_numeric(df2["2019"], errors="coerce")
df2["2020"] = pd.to_numeric(df2["2020"], errors="coerce")
df2["2021"] = pd.to_numeric(df2["2021"], errors="coerce")
df2["2022"] = pd.to_numeric(df2["2022"], errors="coerce")

df_rate_changes = pd.DataFrame()
for year in ["2020", "2021", "2022"]:
    previous_year = str(int(year) - 1)
    new_column_name = f"{year}-{previous_year}"
    df_rate_changes[new_column_name] = ((df2[year] - df2[previous_year]) / df2[previous_year]) * 100
df_rate_changes["state"] = df2["state"]
df_rate_changes = df_rate_changes.drop(df_rate_changes.index[0])

#interactive plot for bea
#2020-2019
us_states_geojson_path = 'C:/Users/thoma/Downloads/gz_2010_us_040_00_500k.json'
gdf_states = gpd.read_file(us_states_geojson_path)
gdf_states.rename(columns={'NAME': 'state'}, inplace=True)
merged_df = pd.merge(gdf_states, df_rate_changes, on='state', how='left')
fig = px.choropleth(
    merged_df,
    geojson=gdf_states.geometry,
    locations=gdf_states.index,
    color='2020-2019',
    hover_name='state',
    hover_data=['2020-2019'],
    title='Employment Rate Change (2020-2019) by State',
    color_continuous_scale="Viridis",
    scope="usa"
)
fig.show()

#2021-2020
fig = px.choropleth(
    merged_df,
    geojson=gdf_states.geometry,
    locations=gdf_states.index,
    color='2021-2020',
    hover_name='state',
    hover_data=['2021-2020'],
    title='Employment Rate Change (2021-2020) by State',
    color_continuous_scale="Viridis",
    scope="usa"
)
fig.show()

#2022-2021
fig = px.choropleth(
    merged_df,
    geojson=gdf_states.geometry,
    locations=gdf_states.index,
    color='2022-2021',
    hover_name='state',
    hover_data=['2022-2021'],
    title='Employment Rate Change (2022-2021) by State',
    color_continuous_scale="Viridis",
    scope="usa"
)
fig.show()
In [ ]:
#region for bea
df_rate_changes['Region'] = df_rate_changes['state'].map(state_to_region)
df_rate_changes= df_rate_changes.sort_values('Region')
df_rate_changes = df_rate_changes.iloc[:-1]
region_means = df_rate_changes.groupby('Region')[["2020-2019", "2021-2020", "2022-2021"]].mean()

# Overall trend for 2020-2019
plt.figure(figsize=(12, 6))
sns.lineplot(data=df_rate_changes, x='Region', y='2020-2019')
plt.title('Overall Trend for 2020-2019')
plt.ylabel('Employment Rate Change')
plt.xlabel('Region')
plt.xticks(rotation=45)
plt.show()

# Overall trend for 2021-2020
plt.figure(figsize=(12, 6))
sns.lineplot(data=df_rate_changes, x='Region', y='2021-2020')
plt.title('Overall Trend for 2021-2020')
plt.ylabel('Employment Rate Change')
plt.xlabel('Region')
plt.xticks(rotation=45)
plt.show()

# Overall trend for 2022-2021
plt.figure(figsize=(12, 6))
sns.lineplot(data=df_rate_changes, x='Region', y='2022-2021')
plt.title('Overall Trend for 2022-2021')
plt.ylabel('Employment Rate Change')
plt.xlabel('Region')
plt.xticks(rotation=45)
plt.show()

#top and worst five 
for year in ["2020-2019", "2021-2020", "2022-2021"]:
    sorted_df = df_rate_changes.sort_values(by=year, ascending=False)
    top_five = sorted_df.head(5)
    worst_five = sorted_df.tail(5)
    print(f"\nTop Five States in {year}:\n{top_five.iloc[:, [0, -1]]}")
    print(f"\nWorst Five States in {year}:\n{worst_five.iloc[:, [0, -1]]}")
    fig, ax = plt.subplots(figsize=(18, 6))
    ax.bar(top_five["state"], top_five[year], label="Top Five", color="green")
    ax.bar(worst_five["state"], worst_five[year], label="Worst Five", color="red")
    ax.set_title(f"Top and Worst Five Employment Rate Changes for U.S. States ({year})")
    ax.set_xlabel("State")
    ax.set_ylabel("Employment Rate Change (%)")
    ax.legend(loc="upper left", bbox_to_anchor=(1, 1))
    plt.show()
In [ ]: